1 Introduction

This part includes descriptive statistics after feature extraction, data cleaning, and preparation.

2 Data preparation

2.1 Source setup

########## folders ##########
# current folder (first go to session -> set working directory -> to source file location)
parentfolder <- dirname(getwd())

data          <- paste0(parentfolder, '/MultIS_data/')
audiodata     <- paste0(parentfolder, '/audio_processed/')
syllables     <- paste0(audiodata,    'syllables/')
dataworkspace <- paste0(parentfolder, '/data_processed/')
datamerged    <- paste0(parentfolder, '/data_merged/')
datasets      <- paste0(parentfolder, '/datasets/')
models        <- paste0(parentfolder, '/models/')
plots         <- paste0(parentfolder, '/plots/')
scripts       <- paste0(parentfolder, '/scripts/')

########## source file ##########

#source(paste0(scripts, "adjectives-preparation.R"))

#################### packages ####################
# Data Manipulation
library(tibble)
library(stringr)
library(tidyverse) # includes readr, tidyr, dplyr, ggplot2

# Plotting
library(ggforce)
library(ggpubr)
library(gridExtra)

colorBlindBlack8  <- c("#000000", "#E69F00", "#56B4E9", "#009E73", 
                       "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

2.2 Load in data frames

participant_info <- read_delim(paste0(data,"ParticipantInfo_GERCAT.csv"), delim = ";")

# Load the information about duration of each segment (if needed)
data_df <- read.table(paste0(syllables, "fileDurationsDF.csv"), header = TRUE, sep = ',')

# Load cleaned syllable data
data <- read_csv(paste0(datasets, "data_cleaned.csv"))

# Load cleaned targets data
targets <- read_csv(paste0(datasets, "targets.csv"))

# Load cleaned targets with pre-post data
data_prepost <- read_csv(paste0(datasets, "data_prepost.csv"))

2.3 You can add participant info

# Process participant_info so that participant number column is only number
participant_info$Participant <- parse_number(participant_info$Participant)

# Merge the dataframes by "Participant" and "Language"
# Exchange META to the dataframe of your liking
# META <- merge(META, participant_info, by = c("Participant", "Language"), all.x = TRUE)

3 Descriptive statistics and visualizations

3.1 Missing values

How many NAs are there?

# Columns to process
columns_to_process <- c(
  "duration", "duration_noSilence", 
  "ampl_median", "ampl_sd", 
  "ampl_noSilence_median", "ampl_noSilence_sd", 
  "env_slope", 
  "amEnvDep_median", "amEnvDep_sd",
  "pitch_median", "pitch_sd", 
  "f0_slope", 
  "fmDep_median", "fmDep_sd",
  "specCentroid_median", "specCentroid_sd", 
  "entropy_median", "entropy_sd", 
  "entropySh_median", "entropySh_sd", 
  "flux_median", "flux_sd",
  "CPP_median", "CPP_sd",
  "novelty_median", "novelty_sd",
  "HNR_median", "HNR_sd",
  "syllTextPre", 
  "durationPre", "duration_noSilencePre", 
  "ampl_medianPre", "ampl_sdPre", 
  "ampl_noSilence_medianPre", "ampl_noSilence_sdPre", 
  "env_slopePre", 
  "amEnvDep_medianPre", "amEnvDep_sdPre",
  "pitch_medianPre", "pitch_sdPre", 
  "f0_slopePre", 
  "fmDep_medianPre", "fmDep_sdPre",
  "specCentroid_medianPre", "specCentroid_sdPre", 
  "entropy_medianPre", "entropy_sdPre", 
  "entropySh_medianPre", "entropySh_sdPre", 
  "flux_medianPre", "flux_sdPre",
  "CPP_medianPre", "CPP_sdPre",
  "novelty_medianPre", "novelty_sdPre",
  "HNR_medianPre", "HNR_sdPre",
  "syllTextPost", 
  "durationPost", "duration_noSilencePost", 
  "ampl_medianPost", "ampl_sdPost", 
  "ampl_noSilence_medianPost", "ampl_noSilence_sdPost", 
  "env_slopePost", 
  "amEnvDep_medianPost", "amEnvDep_sdPost",
  "pitch_medianPost", "pitch_sdPost", 
  "f0_slopePost", 
  "fmDep_medianPost", "fmDep_sdPost",
  "specCentroid_medianPost", "specCentroid_sdPost", 
  "entropy_medianPost", "entropy_sdPost", 
  "entropySh_medianPost", "entropySh_sdPost", 
  "flux_medianPost", "flux_sdPost",
  "CPP_medianPost", "CPP_sdPost",
  "novelty_medianPost", "novelty_sdPost",
  "HNR_medianPost", "HNR_sdPost"
)

# Ensure columns to process are numeric
columns_to_process <- columns_to_process[columns_to_process %in% names(data_prepost)]
columns_to_process <- columns_to_process[sapply(data_prepost[columns_to_process], is.numeric)]

# Sort columns by "Pre", main, and "Post"
# Sort columns by inserting Pre before and Post after each main variable
sort_columns <- function(columns) {
  main_vars <- columns[!grepl("Pre$|Post$", columns)]  # Main variables
  sorted_columns <- c()
  
  for (main_var in main_vars) {
    pre_var <- paste0(main_var, "Pre")
    post_var <- paste0(main_var, "Post")
    # Add Pre, Main, and Post to the sorted list if they exist
    sorted_columns <- c(
      sorted_columns,
      if (pre_var %in% columns) pre_var,
      main_var,
      if (post_var %in% columns) post_var
    )
  }
  return(sorted_columns)
}
columns_to_process <- sort_columns(columns_to_process)

# Function to calculate raw number and proportion of NAs
calculate_na_stats <- function(df, columns, group_column) {
  # Total NA stats
  total_na_counts <- colSums(is.na(df[, columns]))
  total_rows <- nrow(df)
  total_proportions <- total_na_counts / total_rows * 100
  
  # Initialize a data frame for results
  result_df <- data.frame(Variable = columns)
  
  # Calculate NA stats for each group
  unique_groups <- unique(df[[group_column]])
  for (group in unique_groups) {
    group_data <- df[df[[group_column]] == group, ]
    group_na_counts <- colSums(is.na(group_data[, columns]))
    group_total_rows <- nrow(group_data)
    group_proportions <- group_na_counts / group_total_rows * 100
    
    # Add group-specific NA counts and proportions in "count / proportion%" format
    result_df[[paste0(group, "_NA")]] <- paste0(
      group_na_counts, " / ", round(group_proportions, 2), "%"
    )
  }
  
  # Add total NA stats in "count / proportion%" format
  result_df[["Total_NA"]] <- paste0(
    total_na_counts, " / ", round(total_proportions, 2), "%"
  )
  
  return(result_df)
}

# Calculate NA stats
na_stats_before <- calculate_na_stats(data_prepost, columns_to_process, "language")

# Print the results
print(na_stats_before)
##                     Variable   Catalan_NA    German_NA      Total_NA
## 1                durationPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 2                   duration       0 / 0%       0 / 0%        0 / 0%
## 3               durationPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 4      duration_noSilencePre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 5         duration_noSilence    1 / 0.04%       0 / 0%     1 / 0.02%
## 6     duration_noSilencePost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 7             ampl_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 8                ampl_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 9            ampl_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 10                ampl_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 11                   ampl_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 12               ampl_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 13  ampl_noSilence_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 14     ampl_noSilence_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 15 ampl_noSilence_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 16      ampl_noSilence_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 17         ampl_noSilence_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 18     ampl_noSilence_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 19              env_slopePre 236 / 10.51%  115 / 4.91%   351 / 7.65%
## 20                 env_slope   22 / 0.98%   22 / 0.94%    44 / 0.96%
## 21             env_slopePost 317 / 14.11%   88 / 3.75%   405 / 8.82%
## 22        amEnvDep_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 23           amEnvDep_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 24       amEnvDep_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 25            amEnvDep_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 26               amEnvDep_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 27           amEnvDep_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 28           pitch_medianPre 361 / 16.07% 297 / 12.67%  658 / 14.34%
## 29              pitch_median  100 / 4.45%  188 / 8.02%   288 / 6.27%
## 30          pitch_medianPost 517 / 23.02% 523 / 22.31% 1040 / 22.66%
## 31               pitch_sdPre 361 / 16.07% 297 / 12.67%  658 / 14.34%
## 32                  pitch_sd  100 / 4.45%  188 / 8.02%   288 / 6.27%
## 33              pitch_sdPost 517 / 23.02% 523 / 22.31% 1040 / 22.66%
## 34               f0_slopePre  411 / 18.3% 265 / 11.31%  676 / 14.73%
## 35                  f0_slope  130 / 5.79%   98 / 4.18%   228 / 4.97%
## 36              f0_slopePost 466 / 20.75% 431 / 18.39%  897 / 19.54%
## 37           fmDep_medianPre 705 / 31.39% 512 / 21.84% 1217 / 26.51%
## 38              fmDep_median 236 / 10.51%   225 / 9.6%  461 / 10.04%
## 39          fmDep_medianPost 678 / 30.19% 678 / 28.92% 1356 / 29.54%
## 40               fmDep_sdPre 705 / 31.39% 512 / 21.84% 1217 / 26.51%
## 41                  fmDep_sd 236 / 10.51%   225 / 9.6%  461 / 10.04%
## 42              fmDep_sdPost 678 / 30.19% 678 / 28.92% 1356 / 29.54%
## 43    specCentroid_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 44       specCentroid_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 45   specCentroid_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 46        specCentroid_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 47           specCentroid_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 48       specCentroid_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 49         entropy_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 50            entropy_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 51        entropy_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 52             entropy_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 53                entropy_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 54            entropy_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 55       entropySh_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 56          entropySh_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 57      entropySh_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 58           entropySh_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 59              entropySh_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 60          entropySh_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 61            flux_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 62               flux_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 63           flux_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 64                flux_sdPre  185 / 8.24%   18 / 0.77%   203 / 4.42%
## 65                   flux_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 66               flux_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 67             CPP_medianPre 361 / 16.07% 297 / 12.67%  658 / 14.34%
## 68                CPP_median  100 / 4.45%  188 / 8.02%   288 / 6.27%
## 69            CPP_medianPost 517 / 23.02% 523 / 22.31% 1040 / 22.66%
## 70                 CPP_sdPre 361 / 16.07% 297 / 12.67%  658 / 14.34%
## 71                    CPP_sd  100 / 4.45%  188 / 8.02%   288 / 6.27%
## 72                CPP_sdPost 517 / 23.02% 523 / 22.31% 1040 / 22.66%
## 73         novelty_medianPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 74            novelty_median    1 / 0.04%       0 / 0%     1 / 0.02%
## 75        novelty_medianPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 76             novelty_sdPre  183 / 8.15%   17 / 0.73%   200 / 4.36%
## 77                novelty_sd    1 / 0.04%       0 / 0%     1 / 0.02%
## 78            novelty_sdPost 251 / 11.18%   67 / 2.86%   318 / 6.93%
## 79             HNR_medianPre 241 / 10.73%  116 / 4.95%   357 / 7.78%
## 80                HNR_median   22 / 0.98%   73 / 3.11%    95 / 2.07%
## 81            HNR_medianPost 347 / 15.45% 284 / 12.12%  631 / 13.75%
## 82                 HNR_sdPre 298 / 13.27%  207 / 8.83%     505 / 11%
## 83                    HNR_sd   48 / 2.14%   129 / 5.5%   177 / 3.86%
## 84                HNR_sdPost 422 / 18.79% 416 / 17.75%  838 / 18.26%

Save the NA table.

write.csv(na_stats_before, file = paste0(datasets, "NA_stats.csv"), row.names = FALSE)

3.2 Frequencies and proportions

How many target syllables do we have per language?

targets %>%
  group_by(language) %>%
  summarize(Cumulative_Count = n())

And how are they distributed across perceived prosodic prominence ratings?

syll_per_pros <- 
  targets %>% 
  group_by(language, percProm) %>%
  summarize(Count = n()) %>%
  mutate(Proportion = Count / sum(Count))

## Count
ggplot(syll_per_pros, aes(x = percProm, y = Count, fill = language)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.7) +
  labs(#title = "Count of Syll per Language and Prominence",
       x = "Perceived prominence", y = "Count") +
  scale_fill_manual(values = colorBlindBlack8) + 
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_count.png"), plot = last_plot(), width = 6, height = 4)

## Proportion
ggplot(syll_per_pros, aes(x = percProm, y = Proportion, fill = language)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.7) +
  labs(#title = "Proportion of Syll per Language and Prominence",
       x = "Perceived prominence", y = "Proportion") +
  scale_fill_manual(values = colorBlindBlack8) + 
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_prop.png"), plot = last_plot(), width = 6, height = 4)

3.3 Averages prominence

3.3.1 Duration

What is the average duration across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration = mean(duration, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = duration, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Duration (total)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration.png"), plot = last_plot(), width = 6, height = 4)

3.3.2 Duration without silences

What is the average duration of sounding across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration_noSilence = mean(duration_noSilence, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = duration_noSilence, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Duration (without silences)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration_noSilence.png"), plot = last_plot(), width = 6, height = 4)

3.3.3 Amplitude (median)

What is the average amplitude (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_median = mean(ampl_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = ampl_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.4 Amplitude (sd)

What is the average amplitude (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_median = mean(ampl_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = ampl_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.5 Amplitude (median) without silences

What is the average amplitude (median) without silences across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_median = mean(ampl_noSilence_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude without silences (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.6 Amplitude (sd) without silences

What is the average amplitude (sd) without silences across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_median = mean(ampl_noSilence_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude without silences (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.7 Amplitude envelope slope

What is the average amplitude envelope slope across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_env_slope = mean(env_slope, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = env_slope, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude envelope slope",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_env_slope.png"), plot = last_plot(), width = 6, height = 4)

3.3.8 Pitch (median)

What is the average pitch (median) across the different prosodic prominence ratings in Catalan vs in German?

# Raw
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median = mean(pitch_median, na.rm = TRUE))
# Normalized
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median = mean(pitch_median_norm, na.rm = TRUE))

Let’s plot it.

# Raw
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = pitch_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 (raw medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_raw.png"), plot = last_plot(), width = 6, height = 4)

# Normalized
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = pitch_median_norm, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 (normalized medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_norm.png"), plot = last_plot(), width = 6, height = 4)

3.3.9 Pitch (sd)

What is the average pitch (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Raw
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sd, na.rm = TRUE))
# Normalized
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sd_norm, na.rm = TRUE))

Let’s plot it.

# Raw
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 (raw sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_raw.png"), plot = last_plot(), width = 6, height = 4)

# Normalized
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sd_norm, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 (normalized sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_norm.png"), plot = last_plot(), width = 6, height = 4)

3.3.10 F0 slope

What is the average f0 slope across the different prosodic prominence ratings in Catalan vs in German?

# Raw
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slope, na.rm = TRUE))
# Normalized
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slope_norm, na.rm = TRUE))

Let’s plot it.

# Raw
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f0_slope, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 slope (raw)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_raw.png"), plot = last_plot(), width = 6, height = 4)

# Normalized
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f0_slope_norm, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f0 slope (normalized)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-4,4) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_norm.png"), plot = last_plot(), width = 6, height = 4)

3.3.11 F1 (median)

What is the average f1 (median) across the different prosodic prominence ratings in Catalan vs in German?

# Raw
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f1_freq_median = mean(f1_freq_median, na.rm = TRUE))
# Normalized
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f1_freq_median = mean(f1_freq_median_norm, na.rm = TRUE))

Let’s plot it.

# Raw
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f1_freq_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f1 (raw medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f1_freq_median_raw.png"), plot = last_plot(), width = 6, height = 4)

# Normalized
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f1_freq_median_norm, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f1 (normalized medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f1_freq_median_norm.png"), plot = last_plot(), width = 6, height = 4)

We will not use f1 because we cannot be sure that the distribution of vowels is even across perceived prominence ratings.

3.3.12 F2 (median)

What is the average f2 (median) across the different prosodic prominence ratings in Catalan vs in German?

# Raw
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f2_freq_median = mean(f2_freq_median, na.rm = TRUE))
# Normalized
targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_f2_freq_median = mean(f2_freq_median_norm, na.rm = TRUE))

Let’s plot it.

# Raw
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f2_freq_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f2 (raw medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f2_freq_median_raw.png"), plot = last_plot(), width = 6, height = 4)

# Normalized
ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = f2_freq_median_norm, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "f2 (normalized medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f2_freq_median_norm.png"), plot = last_plot(), width = 6, height = 4)

We will not use f2 because we cannot be sure that the distribution of vowels is even across perceived prominence ratings.

3.3.13 CPP (median)

What is the average CPP (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_median = mean(CPP_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = CPP_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "CPP (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.14 CPP (sd)

What is the average CPP (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_sd = mean(CPP_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = CPP_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "CPP (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.15 Flux (median)

What is the average flux (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_median = mean(flux_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = flux_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Flux (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(0, 0.1) + # Because of outliers, especially in Catalan
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.16 Flux (sd)

What is the average flux (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_sd = mean(flux_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = flux_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Flux (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.17 Novelty (median)

What is the average novelty (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_median = mean(novelty_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = novelty_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Novelty (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_median.pdf"), plot = last_plot(), width = 6, height = 4)

3.3.18 Novelty (sd)

What is the average novelty (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_sd = mean(novelty_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = novelty_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Novelty (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.19 Spectral centroid (median)

What is the average spectral centroid (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_median = mean(specCentroid_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Spectral centroid (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.20 Spectral centroid (sd)

What is the average spectral centroid (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_sd = mean(specCentroid_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Spectral centroid (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.21 Wiener entropy (median)

What is the average Wiener entropy (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_median = mean(entropy_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = entropy_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Wiener Entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.22 Wiener entropy (sd)

What is the average Wiener entropy (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_sd = mean(entropy_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = entropy_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Wiener Entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.23 Shannon entropy (median)

What is the average Shannon entropySh (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_median = mean(entropySh_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Shannon Entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.24 Shannon entropy (sd)

What is the average Shannon entropySh (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_sd = mean(entropySh_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Shannon Entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.25 Harmonics-to-Noise Ratio (median)

What is the average Harmonics-to-Noise Ratio (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_median = mean(HNR_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = HNR_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "HNR (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.26 Harmonics-to-Noise Ratio (sd)

What is the average Harmonics-to-Noise Ratio (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_sd = mean(HNR_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = HNR_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "HNR (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.27 Amplitude modulation (median)

What is the average amplitude modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_median = mean(amEnvDep_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.28 Amplitude modulation (sd)

What is the average amplitude modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_sd = mean(amEnvDep_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Amplitude modulation (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_sd.png"), plot = last_plot(), width = 6, height = 4)

3.3.29 Frequency modulation (median)

What is the average frequency modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_median = mean(fmDep_median, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_median, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Frequency modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_median.png"), plot = last_plot(), width = 6, height = 4)

3.3.30 Frequency modulation (sd)

What is the average frequency modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

targets %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_sd = mean(fmDep_sd, na.rm = TRUE))

Let’s plot it.

ggplot(targets %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_sd, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Frequency modulation (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-0.1, 0.5) + # because of outliers
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_sd.png"), plot = last_plot(), width = 6, height = 4)

3.4 Pre- and posttonic

Prepare data frame.

data_long <- data_prepost %>%
  select(fileName, language, itemType, focus, annotationNum,
         starts_with("pitch_"), 
         starts_with("env_"), 
         starts_with("duration"),
         starts_with("ampl_"), 
         starts_with("f0_"), 
         starts_with("f1_freq_"),
         starts_with("f2_freq_"), 
         starts_with("CPP_"),
         starts_with("flux_"),
         starts_with("novelty_"),
         starts_with("specCentroid_"), 
         starts_with("entropy_"), 
         starts_with("entropySh_"),
         starts_with("HNR_"), 
         starts_with("amEnvDep_"),
         starts_with("fmDep_")) %>%
  pivot_longer(cols = -c(fileName, language, itemType, focus, annotationNum),
               names_to = "variable",
               values_to = "value") %>%
  mutate(
    phase = case_when(
      grepl("Pre$", variable) ~ "pre",
      grepl("Post$", variable) ~ "post",
      TRUE ~ "target"
    ),
    # Remove suffixes from variable names for a cleaner look
    variable = gsub("Pre|Post", "", variable)
  )

# Correct the phase factor levels
data_long$phase <- factor(data_long$phase, levels = c("pre", "target", "post"))

3.4.1 Variable x Language plots

# Get unique languages and variables
languages <- unique(data_long$language)
variables <- unique(data_long$variable)

# Without lines
for (var in variables) {
  for (lang in languages) {
    # Filter data_long for the current language and variable
    data_filtered <- subset(data_long, variable == var & language == lang)
    
    # Generate the plot for the current language and variable
    p <- ggplot(data = data_filtered, aes(x = phase, y = value, fill = phase)) +
      geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
      geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
      scale_fill_manual(values = colorBlindBlack8) +
      labs(x = "Syllable",
           y = var,
           title = lang) +
      theme_minimal() +
      theme(legend.position = "none")
    
    # Dynamically generate the file name to include both language and variable
    file_name <- paste0(plots, "/prepost_", var, "_", lang, ".png")
    
    # Save the plot
    ggsave(filename = file_name, plot = p, width = 10, height = 8)
  }
}

# With lines
for (var in variables) {
  for (lang in languages) {
    # Filter data_long for the current language and variable
    data_filtered <- subset(data_long, variable == var & language == lang)
    
    # Generate the plot for the current language and variable
    p <- ggplot(data = data_filtered, aes(x = phase, y = value, fill = phase)) +
      geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
      geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
      geom_line(aes(group = interaction(fileName, annotationNum)), color = "grey", alpha = 0.1) +
      scale_fill_manual(values = colorBlindBlack8) +
      labs(x = "Syllable",
           y = var,
           title = lang) +
      theme_minimal() +
      theme(legend.position = "none")
    
    # Dynamically generate the file name to include both language and variable
    file_name <- paste0(plots, "/prepost_", var, "_lines_", lang, ".png")
    
    # Save the plot
    ggsave(filename = file_name, plot = p, width = 10, height = 8)
  }
}

# Clean up the environment
rm(languages, lang, variables, var, data_filtered)

3.4.2 Variables, Language side-by-side

# Define unique variables for plotting
variables <- unique(data_long$variable)

# Without lines
for (var in variables) {
  # Filter df_long for the current variable
  data_filtered <- subset(data_long, variable == var)
  
  # Generate the plot for the current variable
  p <- ggplot(data = data_filtered, aes(x = phase, y = value, fill = language)) +
    geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
    geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
    scale_fill_manual(values = colorBlindBlack8) +
    labs(x = "Syllable",
         y = var) +
    theme_minimal()
  
  # Dynamically generate the file name to include the variable
  file_name <- paste0(plots, "prepost_", var, ".png")
  
  # Save the plot
  ggsave(filename = file_name, plot = p, width = 10, height = 8)
}
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 800 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 800 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 518 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 522 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 522 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1083 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1083 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1520 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1520 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# With lines
for (var in variables) {
  # Filter df_long for the current variable
  data_filtered <- subset(data_long, variable == var)
  
  # Generate the plot for the current variable
  p <- ggplot(data = data_filtered, aes(x = phase, y = value, fill = language)) +
    geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
    geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
    geom_line(aes(group = interaction(fileName, annotationNum)), color = "grey", alpha = 0.1) +
    scale_fill_manual(values = colorBlindBlack8) +
    labs(x = "Syllable",
         y = var) +
    theme_minimal()
  
  # Dynamically generate the file name to include the variable
  file_name <- paste0(plots, "prepost_", var, "_lines", ".png")
  
  # Save the plot
  ggsave(filename = file_name, plot = p, width = 10, height = 8)
}
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 800 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 800 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 781 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 518 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 518 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1729 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1801 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1729 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1986 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1916 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 522 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 522 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 521 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1083 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1083 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1045 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1520 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1520 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1465 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 519 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 518 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2883 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3034 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2883 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Clean up
rm(variables, var, data_filtered)

3.4.3 Pretonic: Averages prominence

3.4.3.1 Duration

What is the average duration of pretonic across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration = mean(durationPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = durationPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic duration (total)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.2 Duration without silences

What is the average duration of sounding across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration_noSilence = mean(duration_noSilencePre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = duration_noSilencePre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic duration (without silences)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration_noSilence_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.3 Amplitude (median)

What is the average amplitude (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_median = mean(ampl_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.4 Amplitude (sd)

What is the average amplitude (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_sd = mean(ampl_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.5 Amplitude (median) without silences

What is the average amplitude (median) without silences across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_median = mean(ampl_noSilence_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude without silences (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.6 Amplitude (sd) without silences

What is the average amplitude (sd) without silences across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_sd = mean(ampl_noSilence_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude without silences (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.7 Amplitude envelope slope

What is the average amplitude envelope slope across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_env_slope = mean(env_slopePre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = env_slopePre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Pretonic amplitude envelope slope",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_env_slope_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.8 Pitch (median)

What is the average pitch (median) across the different prosodic prominence ratings in Catalan vs in German?

#Vavlues
## Raw
avg_pitch_median_pre <- data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median = mean(pitch_medianPre, na.rm = TRUE))

## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median_norm = mean(pitch_median_normPre, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Pretonic f0 (raw medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_raw_pretonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_median_normPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Pretonic f0 (normalized medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_norm_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.9 Pitch (sd)

What is the average pitch (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
## Raw
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sdPre, na.rm = TRUE))
## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sd_normPre, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic f0 (raw sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_raw_pretonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sd_normPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic f0 (normalized sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_norm_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.10 F0 slope

What is the average f0 slope across the different prosodic prominence ratings in Catalan vs in German?

#Values
## Raw
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slopePre, na.rm = TRUE))
## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slope_normPre, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = f0_slopePre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic f0 slope (raw)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_raw_pretonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = f0_slope_normPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic f0 slope (normalized)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_norm_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.11 CPP (median)

What is the average CPP (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_median = mean(CPP_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = CPP_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic CPP (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.12 CPP (sd)

What is the average CPP (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_sd = mean(CPP_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = CPP_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic CPP (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.13 Flux (median)

What is the average flux (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_median = mean(flux_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = flux_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic flux (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-0.025, 0.15) + # because of outliers
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.14 Flux (sd)

What is the average flux (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_sd = mean(flux_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = flux_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic flux (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.15 Novelty (median)

What is the average novelty (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_median = mean(novelty_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = novelty_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic novelty (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.16 Novelty (sd)

What is the average novelty (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_sd = mean(novelty_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = novelty_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic novelty (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.17 Spectral centroid (median)

What is the average spectral centroid (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_median = mean(specCentroid_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic spectral centroid (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.18 Spectral centroid (sd)

What is the average spectral centroid (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_sd = mean(specCentroid_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic spectral centroid (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.19 Wiener entropy (median)

What is the average Wiener entropy (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_median = mean(entropy_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropy_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic Wiener entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.20 Wiener entropy (sd)

What is the average Wiener entropy (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_sd = mean(entropy_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropy_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic Wiener entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.21 Shannon entropy (median)

What is the average Shannon entropy (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_median = mean(entropySh_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic Shannon entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.22 Shannon entropy (sd)

What is the average Shannon entropy (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_sd = mean(entropySh_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic Shannon entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.23 Harmonics-to-Noise Ratio (median)

What is the average Harmonics-to-Noise Ratio (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_median = mean(HNR_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = HNR_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic HNR (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.24 Harmonics-to-Noise Ratio (sd)

What is the average Harmonics-to-Noise Ratio (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_sd = mean(HNR_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = HNR_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic HNR (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_sd_pretonic.pdf"), plot = last_plot(), width = 6, height = 4)

3.4.3.25 Amplitude modulation (median)

What is the average amplitude modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_median = mean(amEnvDep_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.26 Amplitude modulation (sd)

What is the average amplitude modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_sd = mean(amEnvDep_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic amplitude modulation (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.27 Frequency modulation (median)

What is the average frequency modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_median = mean(fmDep_medianPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_medianPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic frequency modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_median_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.3.28 Frequency modulation (sd)

What is the average frequency modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_sd = mean(fmDep_sdPre, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_sdPre, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Pretonic frequency modulation (sds)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-0.07, 0.5) + #because of outliers
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_sd_pretonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4 Posttonic: Averages prominence

3.4.4.1 Duration

What is the average duration of pretonic across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration = mean(durationPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = durationPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic duration (total)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.2 Duration without silences

What is the average duration of sounding across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_duration_noSilence = mean(duration_noSilencePost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = duration_noSilencePost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic duration (without silences)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_duration_noSilence_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.3 Amplitude (median)

What is the average amplitude (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_median = mean(ampl_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.4 Amplitude (sd)

What is the average amplitude (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_sd = mean(ampl_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.5 Amplitude (median) without silences

What is the average amplitude (median) without silences across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_median = mean(ampl_noSilence_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude without silences (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.6 Amplitude (sd) without silences

What is the average amplitude (sd) without silences across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_ampl_noSilence_sd = mean(ampl_noSilence_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = ampl_noSilence_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude without silences (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_ampl_noSilence_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.7 Amplitude envelope slope

What is the average amplitude envelope slope across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_env_slope = mean(env_slopePost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = env_slopePost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Posttonic amplitude envelope slope",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_env_slope_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.8 Pitch (median)

What is the average pitch (median) across the different prosodic prominence ratings in Catalan vs in German?

#Vavlues
## Raw
avg_pitch_median_pre <- data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median = mean(pitch_medianPost, na.rm = TRUE))

## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_median_norm = mean(pitch_median_normPost, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Posttonic f0 (raw medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_raw_posttonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_median_normPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    x = "Language",
    y = "Posttonic f0 (normalized medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_median_norm_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.9 Pitch (sd)

What is the average pitch (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
## Raw
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sdPost, na.rm = TRUE))
## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_pitch_sd = mean(pitch_sd_normPost, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic f0 (raw sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_raw_posttonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = pitch_sd_normPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic f0 (normalized sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_pitch_sd_norm_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.10 F0 slope

What is the average f0 slope across the different prosodic prominence ratings in Catalan vs in German?

#Values
## Raw
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slopePost, na.rm = TRUE))
## Normalized
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_f0_slope = mean(f0_slope_normPost, na.rm = TRUE))
# Plots
## Raw
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = f0_slopePost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic f0 slope (raw)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_raw_posttonic.png"), plot = last_plot(), width = 6, height = 4)

## Normalized
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = f0_slope_normPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic f0 slope (normalized)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-6,5) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_f0_slope_norm_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.11 CPP (median)

What is the average CPP (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_median = mean(CPP_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = CPP_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic CPP (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.12 CPP (sd)

What is the average CPP (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_CPP_sd = mean(CPP_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = CPP_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic CPP (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_CPP_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.13 Flux (median)

What is the average flux (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_median = mean(flux_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = flux_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic flux (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-0.025, 0.15) + # because of outliers
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.14 Flux (sd)

What is the average flux (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_flux_sd = mean(flux_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = flux_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic flux (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_flux_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.15 Novelty (median)

What is the average novelty (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_median = mean(novelty_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = novelty_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic novelty (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.16 Novelty (sd)

What is the average novelty (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_novelty_sd = mean(novelty_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = novelty_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic novelty (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_novelty_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.17 Spectral centroid (median)

What is the average spectral centroid (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_median = mean(specCentroid_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic spectral centroid (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.18 Spectral centroid (sd)

What is the average spectral centroid (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_specCentroid_sd = mean(specCentroid_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = specCentroid_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic spectral centroid (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_specCentroid_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.19 Wiener entropy (median)

What is the average Wiener entropy (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_median = mean(entropy_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropy_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic Wiener entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_median_postonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.20 Wiener entropy (sd)

What is the average Wiener entropy (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropy_sd = mean(entropy_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropy_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic Wiener entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropy_sd_postonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.21 Shannon entropy (median)

What is the average Shannon entropy (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_median = mean(entropySh_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic Shannon entropy (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_median_postonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.22 Shannon entropy (sd)

What is the average Shannon entropy (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_entropySh_sd = mean(entropySh_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = entropySh_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic Shannon entropy (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_entropySh_sd_postonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.23 Harmonics-to-Noise Ratio (median)

What is the average Harmonics-to-Noise Ratio (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_median = mean(HNR_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = HNR_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic HNR (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.24 Harmonics-to-Noise Ratio (sd)

What is the average Harmonics-to-Noise Ratio (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_HNR_sd = mean(HNR_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = HNR_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic HNR (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_HNR_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.25 Amplitude modulation (median)

What is the average amplitude modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_median = mean(amEnvDep_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.26 Amplitude modulation (sd)

What is the average amplitude modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_amEnvDep_sd = mean(amEnvDep_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = amEnvDep_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic amplitude modulation (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_amEnvDep_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.27 Frequency modulation (median)

What is the average frequency modulation (median) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_median = mean(fmDep_medianPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_medianPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic frequency modulation (medians)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_median_posttonic.png"), plot = last_plot(), width = 6, height = 4)

3.4.4.28 Frequency modulation (sd)

What is the average frequency modulation (sd) across the different prosodic prominence ratings in Catalan vs in German?

# Values
data_prepost %>% 
  group_by(language, percProm) %>%
  summarize(avg_fmDep_sd = mean(fmDep_sdPost, na.rm = TRUE))
# Plot
ggplot(data_prepost %>% filter(!is.na(percProm)), aes(x = language, y = fmDep_sdPost, fill = as.factor(percProm))) +
  geom_violin(scale = "width", trim = FALSE, alpha = 0.3) +
  geom_boxplot(width = 0.1, outlier.shape = NA, position = position_dodge(width = 0.9), alpha = 0.5) +
  labs(
    #title = "Duration of Prosodic Prominence Ratings by Language",
    x = "Language",
    y = "Posttonic frequency modulation (sd)",
    fill = "Prosodic prominence"
  ) +
  scale_fill_manual(values = colorBlindBlack8) +
  ylim(-0.1, 0.6) + # because of outliers
  theme_minimal()

ggsave(filename = paste0(plots, "prominence_fmDep_sd_posttonic.png"), plot = last_plot(), width = 6, height = 4)

This concludes this part of the analysis.

4 Session info

sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=German_Germany.utf8  LC_CTYPE=German_Germany.utf8   
## [3] LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.utf8    
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3   ggpubr_0.6.0    ggforce_0.4.2   lubridate_1.9.3
##  [5] forcats_1.0.0   dplyr_1.1.4     purrr_1.0.2     readr_2.1.5    
##  [9] tidyr_1.3.1     ggplot2_3.5.1   tidyverse_2.0.0 stringr_1.5.1  
## [13] tibble_3.2.1   
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.5      xfun_0.48         bslib_0.8.0       rstatix_0.7.2    
##  [5] tzdb_0.4.0        vctrs_0.6.5       tools_4.4.1       generics_0.1.3   
##  [9] parallel_4.4.1    fansi_1.0.6       highr_0.11        pkgconfig_2.0.3  
## [13] lifecycle_1.0.4   compiler_4.4.1    farver_2.1.2      textshaping_0.4.0
## [17] munsell_0.5.1     carData_3.0-5     htmltools_0.5.8.1 sass_0.4.9       
## [21] yaml_2.3.10       Formula_1.2-5     pillar_1.9.0      car_3.1-3        
## [25] crayon_1.5.3      jquerylib_0.1.4   MASS_7.3-60.2     cachem_1.1.0     
## [29] abind_1.4-8       tidyselect_1.2.1  digest_0.6.37     stringi_1.8.4    
## [33] labeling_0.4.3    polyclip_1.10-7   fastmap_1.2.0     grid_4.4.1       
## [37] colorspace_2.1-1  cli_3.6.3         magrittr_2.0.3    utf8_1.2.4       
## [41] broom_1.0.7       withr_3.0.1       scales_1.3.0      backports_1.5.0  
## [45] bit64_4.5.2       timechange_0.3.0  rmarkdown_2.28    bit_4.5.0        
## [49] ggsignif_0.6.4    ragg_1.3.3        hms_1.1.3         evaluate_1.0.0   
## [53] knitr_1.48        rlang_1.1.4       Rcpp_1.0.13       glue_1.8.0       
## [57] tweenr_2.0.3      rstudioapi_0.16.0 vroom_1.6.5       jsonlite_1.8.9   
## [61] R6_2.5.1          systemfonts_1.1.0